
************************************************************************************************
************************************************************************************************
*This .do file creates the   graduate data file used for the project

*Inputs:
*1. Graduation data files from the NCERDC from 2009-2019

*Output
*1. "/data_analysis/NC_RD_Retake/long_run_grad_data.dta"
************************************************************************************************
************************************************************************************************

clear all
set more off

foreach n of numlist 9 0 1 2 3 4 5 6 7 8{
clear
if `n'==9{
use "/data/Student/Graduates/gradpub09.dta"
}
else if `n'!=9{
use "/data/Student/Graduates/gradpub1`n'.dta"
}


drop if mastid==.

gen grad=1 if dc=="CERTIFICA" | dc=="CERTIFICATE" | dc=="DIPLOMA" | dc=="Certificate" | dc=="Diploma"
gen diploma=1 if dc=="DIPLOMA" | dc=="Diploma"
replace diploma=0 if dc=="CERTIFICA" | dc=="CERTIFICATE" | dc=="Certificate"
drop if dc==""

*Deal with duplicates
keep mastid grad diploma
duplicates tag mastid, gen(g)
egen dip1=max(diploma), by(mastid)
replace diploma=dip1 if g>0
duplicates drop mastid, force
drop dip1

if `n'==9{
gen year=2009
qui compress
save "/data_analysis/Value Added Generate/output/graduate_2009.dta", replace
}
else if `n'!=9{
gen year=201`n'
qui compress
save "/data_analysis/Value Added Generate/output/graduate_201`n'.dta", replace
}
}
*Do 2019 separately
clear all
use "/data/Student/Graduates/gradpub19.dta"
drop if mastid==.
gen grad=1 if dc=="CERTIFICA" | dc=="CERTIFICATE" | dc=="DIPLOMA" | dc=="Certificate" | dc=="Diploma"
gen diploma=1 if dc=="DIPLOMA" | dc=="Diploma"
replace diploma=0 if dc=="CERTIFICA" | dc=="CERTIFICATE" | dc=="Certificate"
drop if dc==""
keep mastid grad diploma
duplicates tag mastid, gen(g)
egen dip1=max(diploma), by(mastid)
replace diploma=dip1 if g>0
duplicates drop mastid, force
drop dip1
gen year=2019
qui compress
save "/data_analysis/Value Added Generate/output/graduate_2019.dta", replace


*Append all together
clear all
foreach year of numlist 2009(1)2019{
append using "/data_analysis/Value Added Generate/output/graduate_`year'.dta"
erase "/data_analysis/Value Added Generate/output/graduate_`year'.dta"
}
drop g

*Assign to a diploma if ever got one
duplicates tag mastid, gen(g)
egen dip1=max(diploma), by(mastid)
replace diploma=dip1 if g>0
duplicates drop mastid, force
drop year g dip1
compress
save "/data_analysis/NC_RD_Retake/long_run_grad_data.dta", replace


